#pragma once
#ifndef CHROMOSOME_NT_DATA_H
#define CHROMOSOME_NT_DATA_H

#include "ChrIndex2GeneName.h"
#include "ShortReadUtil.h"
#include "Filename.h"
#include <string>
#include <iostream>
#include <time.h>
#include "stdafx.h"
using namespace std;

const int _MAX_KMER_LENGTH_ = 64;

/*
 * This class is designed to read in a big chromosome file (in fasta format)
 * and get the fragment of each kmer.
*/
class CchromosomeNTdata
{
public:
    CchromosomeNTdata(void);
    CchromosomeNTdata(const char* Filename);
    ~CchromosomeNTdata(void);

    // Currently, the only accept .fasta or .fa which contains AaCcGgTt and N. Other character will be removed.
    char caInputFileName[FILENAME_MAX];
    int Constructor_Fasta(const char* Filename);
    int Consrructor_PreSeq(const char* Filename);

    // Generate the next kmer starting from this->SlideWindowStart and put into this->caKmer
    char* fragKmer(unsigned int uiKmer_Length);
    // Generate the next kmer with only ACGT and put into this->caKmer
    char* fragACGTKmer(unsigned int uiKmer_Length);

    char* caChromosome;
    ChrIndex2GeneName geneVec;

    //This shouldn't be a free pointer. This is a buffer need to new space, fixed to some space, and release
    char caKmer[_MAX_KMER_LENGTH_];
    unsigned int  iChromosome_size;
    //bool flag to show it has generated the last fragment
    bool end;

    //The start index for generating k-mer fragment is set.
    unsigned int SlideWindowStart;
protected:
    unsigned int removedNonACGTNBaseAndCollectGeneName(void);
private:
    int initialization(void);
    //This will get chromosome size from a fasta file

    // Temporarily not used because of I/O speed on PC is slow
    int getsizeofChromosome(const char* Filename);
    int readFastaFileLineByLine(ifstream &ifile);
};

// used for preprocessed chromosome
typedef struct _ch_header {
    unsigned int totalSize;
    unsigned int size;
} ch_header;
#endif


